from pyspark.sql import Row
from pyspark.sql.functions import desc
from pyspark.sql import SparkSession

if __name__ == '__main__':
    spark = SparkSession.builder.appName("join_learn").master("local[*]").getOrCreate()
    df = spark.createDataFrame([(2, "Alice"), (5, "Bob")]).toDF("age", "name")
    df2 = spark.createDataFrame([Row(height=80, name="Tom"), Row(height=85, name="Bob")])
    df3 = spark.createDataFrame([Row(age=2, name="Alice"), Row(age=5, name="Bob")])
    df4 = spark.createDataFrame([
        Row(age=10, height=80, name="Alice"),
        Row(age=5, height=None, name="Bob"),
        Row(age=None, height=None, name="Tom"),
        Row(age=None, height=None, name=None),
    ])
    df.join(df2, 'name').select(df.name, df2.height).show()
    df.join(df4, ['name', 'age']).select(df.name, df.age).show()

    df.join(df2, df.name == df2.name, 'outer').select(
        df.name, df2.height).sort(desc("name")).show()
    df.join(
        df3,
        [df.name == df3.name, df.age == df3.age],
        'outer'
    ).select(df.name, df3.age).show()
    spark.stop()
